home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PsL Monthly 1993 December
/
PSL Monthly Shareware CD-ROM (December 1993).iso
/
prgmming
/
dos
/
c
/
tagsgen.exe
/
CTAG.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-03-28
|
74KB
|
2,057 lines
/*
EPSHeader
File: ctag.c
Author: J. Kercheval
Created: Sun, 07/14/1991 17:24:44
*/
/*
EPSRevision History
J. Kercheval Sat, 07/27/1991 22:08:04 creation
J. Kercheval Sun, 08/18/1991 20:58:13 completion of CGetToken()
J. Kercheval Wed, 08/21/1991 22:34:49 place function recognition
J. Kercheval Wed, 08/21/1991 23:11:17 add defines and macros
J. Kercheval Wed, 08/21/1991 23:54:33 add typedef and class parsing
J. Kercheval Thu, 08/22/1991 23:53:51 add global variables
J. Kercheval Thu, 08/22/1991 23:54:05 add enum, struct, union
J. Kercheval Thu, 08/22/1991 23:54:28 add globals via typedefs
J. Kercheval Sun, 08/25/1991 23:09:28 complete semantic parser
J. Kercheval Tue, 08/27/1991 23:28:34 fix bug in typedef, struct, enum and union declarations
J. Kercheval Sat, 08/31/1991 23:58:03 add prototype parsing
J. Kercheval Tue, 09/03/1991 22:28:55 move many macros to functions
J. Kercheval Tue, 09/03/1991 23:05:34 clean code and consolidate to functions
J. Kercheval Wed, 09/04/1991 00:16:21 add GNU tag output format support
J. Kercheval Sun, 09/08/1991 13:24:53 minor bug fix in function and global variable parser
J. Kercheval Sun, 09/08/1991 21:31:06 fix bug in lexical parser
J. Kercheval Mon, 09/09/1991 21:49:19 fix bug in function parser
J. Kercheval Mon, 09/09/1991 22:39:12 fix buf in define parser
J. Kercheval Tue, 09/10/1991 22:06:09 fix typedef parser
J. Kercheval Wed, 09/11/1991 02:04:48 add extern symbol recognition
J. Kercheval Wed, 09/11/1991 19:49:11 fix bug in function pointer variable declaration
J. Kercheval Wed, 09/11/1991 20:38:13 add support for function pointer variable declarations after first declaration
J. Kercheval Wed, 09/11/1991 21:51:37 move #directive parsing between semantic and lexical parser
J. Kercheval Thu, 09/12/1991 22:44:43 add support for #ifdef blocks to avoid unmatched parens in ToLevelZero parsing
J. Kercheval Wed, 09/18/1991 22:05:02 fix bug in GetToken and DiscardLine
J. Kercheval Thu, 09/19/1991 22:26:09 fix bug in lexical parser when parsing non C syntax files
J. Kercheval Thu, 10/03/1991 18:15:10 add support for Static declarations
J. Kercheval Fri, 10/04/1991 11:13:23 add support for tagging enumeration constants
J. Kercheval Mon, 10/07/1991 09:36:07 create CParseEnumerationConstants()
J. Kercheval Tue, 11/12/1991 21:46:25 add junk filter on token output
J. Kercheval Sat, 03/28/1992 13:50:06 fix a few bugs and add extern "C" parsing
*/
#include <string.h>
#include "ctag.h"
#include "tagio.h"
#include "log.h"
#define CBUFSIZE 4096
#define MAX_TOKEN_LENGTH 4096
/* function for determining if character is whitespace */
#define IsWhite(c) ( _C_white_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _C_white_table[256];
/* list of whitespace characters */
char C_white[] = " \f\t\v\n\r";
/* function for determining if character is a delimiter */
#define IsDelim(c) ( _C_delim_table[c] )
/* the indexed table for token delimiter lookup */
BOOLEAN _C_delim_table[256];
/* list of token delimiters */
char C_delim[] = " \f\t\v\n\r\"[](){}#;:,.'=-+*/%&|^~!<>?";
/* function for determining if character is a puncuator */
#define IsPunctuator(c) ( _C_punctuator_table[c] )
/* the indexed table for punctuator character lookup */
BOOLEAN _C_punctuator_table[256];
/* list of punctuators */
char C_punctuator[] = "[](){},;=";
/*
* symbol type information is tied to the switches in flags in CTags(). This
* enum is used to denote the type of the current tag for determining where
* the appropriate name is located
*/
enum SymbolTypeEnum {
NOP, Function, ProtoType, Structure, TypeDefinition, Macro,
Enumeration, EnumerationConstant, Union, GlobalVariable, Class,
Define, Extern, Static
};
/* convenient definition */
typedef enum SymbolTypeEnum SymbolType;
/* the current file buffer state */
typedef struct BufferStruct {
char Cbuf[CBUFSIZE + 1]; /* input buffer for get_token routine */
char *buffer; /* current index into the pointer */
long int token_char_location; /* current token char location */
long int token_line_location; /* current token line in buffer */
long int token_line_offset; /* offset of current line */
FILE *infile;
} Buffer;
/* the current input token state */
typedef struct TokenStruct {
char sbuf1[MAX_TOKEN_LENGTH]; /* the first token buffer */
long int charloc1; /* the char location of sbuf1 */
long int tokenline1; /* the line number of sbuf1 */
long int lineoffset1; /* the line offset of sbuf1 */
char sbuf2[MAX_TOKEN_LENGTH]; /* the second token buffer */
long int charloc2; /* the char location of sbuf2 */
long int tokenline2; /* the line number of sbuf2 */
long int lineoffset2; /* the line offset of sbuf2 */
char *cur_token; /* pointer to the current token buffer */
long int *cur_char_location;/* the location of current token */
long int *cur_token_line; /* the line of the current token */
long int *cur_line_offset; /* the line offset of the current token */
char *prev_token; /* pointer to the last token buffer */
long int *prev_char_location; /* the location of previous token */
long int *prev_token_line; /* the line of the previous token */
long int *prev_line_offset; /* the line offset of the previous token */
int token_count; /* tokens seen since last */
int else_nesting_level; /* levels deep in #else/#elif nest */
BOOLEAN extern_active; /* minor state for this statement */
BOOLEAN CPP_extern_active; /* minor state for this statement */
BOOLEAN static_active; /* minor state for this statement */
} Token;
/*----------------------------------------------------------------------------
*
* CParserInit() initializes the tables required by the parser. The tables
* used are a simple boolean index which are true if the character
* corresponding to the index is a member of the associated table.
*
---------------------------------------------------------------------------*/
void CParserInit()
{
char *s;
int i;
/* init the entire block to FALSE */
for (i = 0; i < 256; i++) {
_C_delim_table[i] = FALSE;
_C_white_table[i] = FALSE;
_C_punctuator_table[i] = FALSE;
}
/* set the characters in the delim set to TRUE */
for (s = C_delim; *s; s++) {
_C_delim_table[*s] = TRUE;
}
/* set the characters in the white set to TRUE */
for (s = C_white; *s; s++) {
_C_white_table[*s] = TRUE;
}
/* set the characters in the punctuator set to TRUE */
for (s = C_punctuator; *s; s++) {
_C_punctuator_table[*s] = TRUE;
}
}
/*----------------------------------------------------------------------------
*
* CSymbolWanted() returns true if flags are true for the symbol type passed
* and false otherwise. The following mapping is done:
*
* Flag Type
* --------- --------------
* flags->cf FunctionCall
* flags->cp ProtoType
* flags->cs Structure
* flags->ct TypeDefinition
* flags->cm Macro
* flags->ce Enumeration
* flags->ck EnumerationConstant
* flags->cu Union
* flags->cv GlobalVariable
* flags->cc Class
* flags->cd Define
*
---------------------------------------------------------------------------*/
BOOLEAN CSymbolWanted(SymbolType type, Flags * flags)
{
switch (type) {
case Function:
if (flags->cf)
return TRUE;
break;
case ProtoType:
if (flags->cp)
return TRUE;
break;
case GlobalVariable:
if (flags->cv)
return TRUE;
break;
case Define:
if (flags->cd)
return TRUE;
break;
case Macro:
if (flags->cm)
return TRUE;
break;
case Structure:
if (flags->cs)
return TRUE;
break;
case TypeDefinition:
if (flags->ct)
return TRUE;
break;
case Enumeration:
if (flags->ce)
return TRUE;
break;
case EnumerationConstant:
if (flags->ck)
return TRUE;
break;
case Union:
if (flags->cu)
return TRUE;
break;
case Class:
if (flags->cc)
return TRUE;
break;
default:
return FALSE;
break;
}
/* not reached */
return FALSE;
}
/*----------------------------------------------------------------------------
*
* CTokenType() takes the token passed and determines if the token is a
* special token. Special tokens require specialized handling in the parser.
* The function returns the type of token according to the SymbolTypeEnum
* enumeration. This routine can only tell so much from one symbol but will
* return some type for all the *interesting* tokens. Anything that is
* loosely defined is given back with the closest type available and the
* parser must give it contextual meaning
*
---------------------------------------------------------------------------*/
SymbolType CTokenType(char *token)
{
char start[] = "cestu"; /* list of starting characters of symbols */
/* look for dirty rejection */
if (!strchr(start, token[0]))
return NOP;
/* structure declarations */
if (!strcmp(token, "struct"))
return Structure;
/* type declaration */
if (!strcmp(token, "typedef"))
return TypeDefinition;
/* enumeration declaration */
if (!strcmp(token, "enum"))
return Enumeration;
/* union declaration */
if (!strcmp(token, "union"))
return Union;
/* class declaration */
if (!strcmp(token, "class"))
return Class;
/* extern declaration */
if (!strcmp(token, "extern"))
return Extern;
/* static declaration */
if (!strcmp(token, "static"))
return Static;
/* do not recognize it as anything special */
return NOP;
}
/*----------------------------------------------------------------------------
*
* CIsDeclarationToken() takes the token passed and determines if the token
* is a declaration keyword used in C. The user may define new declaration
* keywords via use of the typedef keyword. This alters the syntax of C. If
* the syntax is changed in this way it is probable that this routine would
* not return the correct value. For the standard uses of this routine that
* information should not hinder performance for the vast majority of the
* cases.
*
---------------------------------------------------------------------------*/
#define SYMBOL_SIZE 20
BOOLEAN CIsDeclarationToken(char *token)
{
char token_list[][SYMBOL_SIZE] =
{
"*ivclsdfuaretp_hn\"", /* list of starting characters of symbols
* below */
"*", /* pointer */
"\"C\"", /* C++ extern for C code */
"int", /* integer declaration */
"void", /* void type */
"char", /* character */
"long", /* long integer */
"short", /* short integer */
"double", /* double floating point */
"float", /* floating point */
"signed", /* signed integer */
"unsigned", /* unsigned integer */
"auto", /* auto variable (local duration) */
"register", /* register variable */
"static", /* static variable */
"struct", /* structure define */
"union", /* union define */
"enum", /* enum defined */
"typedef", /* type definition */
"const", /* constant variable */
"extern", /* external declaration */
"class", /* class declaration */
"friend", /* class modifier */
"private", /* class modifier */
"protected", /* class modifier */
"public", /* class modifier */
"volatile", /* Compiler warning */
"_based", /* pointer type */
"_cdecl", /* parameter calling sequence, C style */
"cdecl", /* parameter calling sequence, C style */
"_far", /* pointer type */
"far", /* pointer type */
"_huge", /* pointer type */
"huge", /* pointer type */
"_near", /* pointer type */
"near", /* pointer type */
"_pascal", /* parameter calling sequence, PASCAL style */
"pascal", /* parameter calling sequence, PASCAL style */
"_fortran", /* parameter calling sequence, FORTRAN style */
"_fastcall", /* parameter calling sequence, via registers */
"\0"
};
int index;
/* look for dirty rejection */
if (!strchr(token_list[0], token[0]))
return FALSE;
/* march through array until membership is determined */
for (index = 1; *token_list[index]; (index)++) {
/* return true if token found */
if (!strcmp(token, token_list[index])) {
return TRUE;
}
}
/* did not find it */
return FALSE;
}
/*----------------------------------------------------------------------------
*
* COutputToken() will output a token of a given type. The token is output
* if the passed token type is requested from the command line.
*
---------------------------------------------------------------------------*/
void COutputToken(Token * token, Buffer * token_buffer,
SymbolType token_type, FILE * outfile,
char *infname, Flags * flags)
{
char line[MAX_TOKEN_LENGTH];/* the line for use with GNU output format */
long int old_offset; /* the previous value of the file ptr */
int line_length; /* the length of the line */
/* init */
line[0] = '\0';
/* filter any junk tags */
if (!IsDelim(token->prev_token[0])) {
/* check that the symbol is wanted and output it if so */
if (CSymbolWanted(token_type, flags)) {
/* return if external and externals not wanted */
if (token->extern_active) {
if (!flags->cx) {
if (token_type != Function &&
token_type != Define &&
token_type != Macro) {
return;
}
}
}
/* return if static and statics are not wanted */
if (token->static_active) {
if (!flags->ci) {
if (token_type != Define &&
token_type != Macro) {
return;
}
}
}
/* if Epsilon or GNU output is specified then we need to
output the full line */
if (flags->og || flags->oe) {
/* store the current file offset, move to the line offset, read
* the line into a buffer and restore the file offset */
old_offset = ftell(token_buffer->infile);
if (fseek(token_buffer->infile,
*(token->prev_line_offset), SEEK_SET)) {
log_message("# COutputToken() -- internal error - continuing");
}
else {
fgets(line, MAX_TOKEN_LENGTH, token_buffer->infile);
line_length = strlen(line);
if (line[line_length - 1] == '\n') {
line[line_length - 1] = '\0';
}
if (fseek(token_buffer->infile, old_offset, SEEK_SET)) {
log_message("# COutputToken() -- internal error - continuing");
}
}
}
OutputTag(outfile, line,
token->prev_token, infname,
*(token->prev_token_line),
*(token->prev_char_location) -
strlen(token->prev_token), flags);
}
}
}
/*----------------------------------------------------------------------------
*
* CGetToken() will obtain the next token in the line pointed to by lptr and
* in addition will return FALSE if EOL is reached. This routine is passed
* an inbut buffer (Cbuf) and a current pointer into the buffer. It is the
* responsibility of this routine to refill the buffer if required. Quoted
* strings and single quoted characters are returned as a single token.
* Comments are completely ignored by this parser. The token will not exceed
* max_token_length - 1 in length (not including the end of line delimiter)
*
---------------------------------------------------------------------------*/
BOOLEAN CGetToken(FILE * infile,
char **buffer,
char *Cbuf,
char *token,
int max_token_length,
long int *line_number,
long int *char_number,
long int *line_offset)
{
typedef enum parser_state { /* a state of the lexical parser */
Parse, BeginCommentMaybe, InComment, InCommentEndMaybe, InCPPComment,
InQuoteNormal, InQuoteLiteral, InSingleQuoteNormal,
InSingleQuoteLiteral, EndSingleQuote, WhiteSpace, Exit
} State;
State current_state; /* the current state of the parser */
char c; /* the current character being examined */
char *t; /* pointer into token */
int token_length; /* the current token length cannot exceed max
* token length */
/* init */
current_state = WhiteSpace;
t = token;
*t = '\0';
token_length = 0;
/* parse the file for the next token */
while (TRUE) {
c = **buffer;
/* if the buffer has been completely used, refill the buffer, I make
* the tacit assumption here that the null character is not a member
* of the source file */
if (!c) {
*buffer = Cbuf;
if (FillBuffer(infile, Cbuf,
(long int) CBUFSIZE)) {
c = **buffer;
}
else {
/* return the token if it exists */
if (t != token) {
*t = '\0';
return TRUE;
}
else
return FALSE;
}
}
/* react on the state machine */
switch (current_state) {
case Parse:
switch (c) {
case '/':
/* return if we already have a token */
if (t != token) {
(*buffer)--;
(*char_number)--;
current_state = Exit;
}
else {
/* this may be the begin if a comment or the
* division symbol, read the next character after
* verifying it the buffer doesn't need refilling */
current_state = BeginCommentMaybe;
*t = c;
}
break;
case '\"':
/* return if we already have a token */
if (t != token) {
(*buffer)--;
(*char_number)--;
current_state = Exit;
}
else {
current_state = InQuoteNormal;
*t++ = c;
token_length++;
}
break;
case '\'':
/* return if we already have a token */
if (t != token) {
(*buffer)--;
(*char_number)--;
current_state = Exit;
}
else {
current_state = InSingleQuoteNormal;
*t++ = c;
token_length++;
}
break;
default:
/* if it is a delimiter than stop processing */
if (IsDelim(c)) {
/* if a token exists then back up in buffer */
if (t != token) {
(*buffer)--;
(*char_number)--;
}
else {
*t++ = c;
token_length++;
}
current_state = Exit;
}
else {
/* normal character, store it in the token */
*t++ = c;
token_length++;
}
break;
}
break;
case WhiteSpace:
/* pass over whitespace, backup one char if no longer in
* white space region */
if (!IsWhite(c)) {
current_state = Parse;
(*buffer)--;
(*char_number)--;
}
else {
/* check for newline */
if (c == '\n') {
(*line_number)++;
*line_offset = *char_number + *line_number;
}
}
break;
case BeginCommentMaybe:
switch (c) {
case '/':
current_state = InCPPComment;
break;
case '*':
current_state = InComment;
break;
default:
t++;
token_length++;
(*buffer)--;
(*char_number)--;
current_state = Exit;
break;
}
break;
case InComment:
switch (c) {
case '*':
/* this is potentially the end of the comment */
current_state = InCommentEndMaybe;
break;
case '\n':
/* new line just increment state variables */
(*line_number)++;
*line_offset = *char_number + *line_number;
break;
default:
break;
}
break;
case InCommentEndMaybe:
switch (c) {
case '/':
/* this is indeed the end of the comment */
current_state = WhiteSpace;
break;
case '*':
/* this is also perhaps the end of comment */
break;
case '\n':
/* new line just increment state variables */
(*line_number)++;
*line_offset = *char_number + *line_number;
default:
/* still part of the current comment */
current_state = InComment;
break;
}
break;
case InCPPComment:
if (c == '\n') {
current_state = WhiteSpace;
(*line_number)++;
*line_offset = *char_number + *line_number;
}
break;
case InQuoteNormal:
switch (c) {
case '\"':
/* end of InQuoteNormal state */
current_state = Exit;
break;
case '\\':
/* InQuoteLiteral state */
current_state = InQuoteLiteral;
break;
default:
/* normal dull behavior */
break;
}
*t++ = c;
token_length++;
break;
case InQuoteLiteral:
/* this char is simply copied */
current_state = InQuoteNormal;
*t++ = c;
token_length++;
break;
case InSingleQuoteNormal:
switch (c) {
case '\\':
/* InQuoteLiteral state */
current_state = InSingleQuoteLiteral;
break;
default:
/* Just copy the character and move to close quote */
current_state = EndSingleQuote;
break;
}
*t++ = c;
token_length++;
break;
case InSingleQuoteLiteral:
/* this char is simply copied */
current_state = EndSingleQuote;
*t++ = c;
token_length++;
break;
case EndSingleQuote:
/* end of InSingleQuote states */
current_state = Exit;
*t++ = c;
token_length++;
break;
case Exit:
*t = '\0';
return TRUE;
break;
default: /* not reached */
break;
}
/* if the token_length has gotten too large then return */
if (token_length == max_token_length - 1) {
*t = '\0';
return TRUE;
}
/* move to the next buffer location */
(*buffer)++;
(*char_number)++;
}
}
/*----------------------------------------------------------------------------
*
* CFillToken() will obtain the next lexical parser from the buffer and move
* the token into the Token structure. TRUE is returned if the lexical
* parser returns TRUE, otherwise FALSE is returned.
*
---------------------------------------------------------------------------*/
BOOLEAN CFillToken(Token * token, Buffer * token_buffer)
{
BOOLEAN token_found;
/* obtain the next token */
token_found = CGetToken(token_buffer->infile,
&(token_buffer->buffer),
token_buffer->Cbuf, token->cur_token,
MAX_TOKEN_LENGTH,
&(token_buffer->token_line_location),
&(token_buffer->token_char_location),
&(token_buffer->token_line_offset));
/* if one is around then update the state for that token */
if (token_found) {
/* update location variables */
*(token->cur_char_location) =
token_buffer->token_char_location;
*(token->cur_token_line) =
token_buffer->token_line_location;
*(token->cur_line_offset) =
token_buffer->token_line_offset;
}
return token_found;
}
/*----------------------------------------------------------------------------
*
* CTokenSwap() will swap the token variables and set the prev_ variables
* correctly
*
---------------------------------------------------------------------------*/
void CTokenSwap(Token * token)
{
char *charswap; /* temporary swap variable */
long int *longintswap; /* temporary swap variable */
/* swap the active token string */
charswap = token->cur_token;
token->cur_token = token->prev_token;
token->prev_token = charswap;
/* swap the active character location */
longintswap = token->cur_char_location;
token->cur_char_location = token->prev_char_location;
token->prev_char_location = longintswap;
/* swap the active line */
longintswap = token->cur_token_line;
token->cur_token_line = token->prev_token_line;
token->prev_token_line = longintswap;
/* swap the active line offset */
longintswap = token->cur_line_offset;
token->cur_line_offset = token->prev_line_offset;
token->prev_line_offset = longintswap;
}
/*----------------------------------------------------------------------------
*
* CDiscardLine() will move past all the characters up to the next EOL that
* is not preceded by a line continuation character. This routine will
* return TRUE if there was a '(' character as the first character. This
* return value is useful for determining if #defines are macros or simple
* defines.
*
---------------------------------------------------------------------------*/
BOOLEAN
CDiscardLine(FILE * infile, char **buffer, char *Cbuf,
long int *line_number, long int *char_number,
long int *line_offset)
{
char c; /* the current character being examined */
BOOLEAN line_continue; /* TRUE if line continuation true */
BOOLEAN is_macro; /* TRUE if the first delimiter char is '(' */
BOOLEAN first_char; /* TRUE when first character is active */
/* init */
c = '\0';
line_continue = FALSE;
is_macro = FALSE;
first_char = TRUE;
/* loop until non continued EOL encountered */
do {
c = **buffer;
(*buffer)++;
(*char_number)++;
/* handle the newline */
if (c == '\n') {
line_continue = FALSE;
(*line_number)++;
*line_offset = *char_number + *line_number - 1;
}
/* if the buffer has been completely used, refill the buffer, I make
* the tacit assumption here that the null character is not a member
* of the source file */
if (!c) {
*buffer = Cbuf;
if (FillBuffer(infile, Cbuf,
(long int) CBUFSIZE)) {
c = **buffer;
(*char_number)--;
}
else {
/* end of file reached */
return is_macro;
}
}
if (c == '\\')
line_continue = TRUE;
if (first_char) {
if (c == '(')
is_macro = TRUE;
first_char = FALSE;
}
} while (c != '\n' || line_continue);
return is_macro;
}
/*----------------------------------------------------------------------------
*
* CParseDefine() will parse macros and defines in standard C syntax and
* distinguish between a macro and a define, if there is a punctuator '(' as
* the first character after the token, then it is a macro.
*
---------------------------------------------------------------------------*/
void CParseDefine(Token * token, Buffer * token_buffer,
FILE * outfile, char *infname, Flags * flags)
{
SymbolType tmptype; /* a temporay type variable */
BOOLEAN token_found;
BOOLEAN is_macro;
token_found = CFillToken(token, token_buffer);
if (token_found) {
/* save the previous values */
CTokenSwap(token);
/* get rid of the rest of the line and return the define type */
is_macro =
CDiscardLine(token_buffer->infile,
&(token_buffer->buffer),
token_buffer->Cbuf,
&(token_buffer->token_line_location),
&(token_buffer->token_char_location),
&(token_buffer->token_line_offset));
/* react on the token */
if (is_macro) {
tmptype = Macro;
}
else {
tmptype = Define;
}
/* output the token */
COutputToken(token, token_buffer, tmptype,
outfile, infname, flags);
}
}
/*----------------------------------------------------------------------------
*
* CParsePreprocessorDirective() will parse preprocessor directives in
* standard C syntax
*
---------------------------------------------------------------------------*/
void CParsePreprocessorDirective(Token * token, Buffer * token_buffer,
FILE * outfile, char *infname, Flags * flags)
{
BOOLEAN token_found;
token_found = CFillToken(token, token_buffer);
if (token_found) {
/* deal with a define directive */
if (!strcmp(token->cur_token, "define")) {
CParseDefine(token, token_buffer, outfile, infname, flags);
}
else {
/* increment the else block level pointer */
if (!strcmp(token->cur_token, "else")) {
token->else_nesting_level++;
}
else {
/* decrement the else block level pointer */
if (!strcmp(token->cur_token, "endif")) {
if (token->else_nesting_level)
token->else_nesting_level--;
}
else {
/* if an else has not already been seen then increment
* the level */
if (!strcmp(token->cur_token, "elif")) {
token->else_nesting_level++;
}
}
}
/* remove the rest of the directive line including line
* continuation characters */
CDiscardLine(token_buffer->infile,
&(token_buffer->buffer),
token_buffer->Cbuf,
&(token_buffer->token_line_location),
&(token_buffer->token_char_location),
&(token_buffer->token_line_offset));
}
}
}
/*----------------------------------------------------------------------------
*
* CNextToken() will obtain the next token in the buffer and update the
* appropriate variables.
*
---------------------------------------------------------------------------*/
BOOLEAN CNextToken(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
BOOLEAN cycle;
do {
/* obtain the next token */
token_found = CFillToken(token, token_buffer);
/* check for preprocessing directives and parse them if found */
if (token->cur_token[0] == '#' && token_found) {
/* parse the directive and loop back to get another token */
CParsePreprocessorDirective(token, token_buffer,
outfile, infname, flags);
cycle = TRUE;
}
else {
/* we found a token to pass to the semantic parser */
cycle = FALSE;
}
} while (cycle);
/* return it */
return token_found;
}
/*----------------------------------------------------------------------------
*
* CToLevelZero() will increment the nesting level and then parse tokens
* until level zero has been reached again. If tokens are no longer
* available this loop will stop.
*
---------------------------------------------------------------------------*/
void CToLevelZero(Token * token, Buffer * token_buffer,
FILE * outfile, char *infname, Flags * flags)
{
char open_brace[] = "{[("; /* open brace set */
char close_brace[] = ")]}"; /* close brace set */
int nesting_level = 1;
token->else_nesting_level = 0;
while (nesting_level) {
if (CGetToken(token_buffer->infile, &(token_buffer->buffer),
token_buffer->Cbuf, token->cur_token,
MAX_TOKEN_LENGTH,
&(token_buffer->token_line_location),
&(token_buffer->token_char_location),
&(token_buffer->token_line_offset))) {
if (token->cur_token[0] == '#') {
CParsePreprocessorDirective(token, token_buffer,
outfile, infname, flags);
}
else {
/* only count open brace, parens and brackets within blocks
* of one element of an ifdef code block */
if (!token->else_nesting_level) {
if (strchr(open_brace, token->cur_token[0])) {
nesting_level++;
}
else {
if (strchr(close_brace, token->cur_token[0])) {
nesting_level--;
}
}
}
}
}
else
nesting_level = 0;
}
}
/*----------------------------------------------------------------------------
*
* CToPunctuator() will parse tokens until the next punctuator has been
* reached. If tokens are no longer available this loop will stop. If this
* loop is successful the found flag declared in the host routine will be
* set.
*
---------------------------------------------------------------------------*/
BOOLEAN CToPunctuator(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN punctuator_found;
/* init and parse through until the first punctuator is found */
token->token_count = 0;
punctuator_found = FALSE;
while (!punctuator_found) {
token->token_count++;
CTokenSwap(token);
if (!CNextToken(token, token_buffer, outfile, infname, flags)) {
break;
}
else {
if (IsPunctuator(token->cur_token[0]))
punctuator_found = TRUE;
}
}
/* return value */
return punctuator_found;
}
/*----------------------------------------------------------------------------
*
* CParseParens() will move through a declaration in parentheses and place
* the correct valid token as prev_token. This return TRUE if a '[' was seen
* within the parens and false otherwise.
*
---------------------------------------------------------------------------*/
BOOLEAN CParseParens(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
BOOLEAN variable_seen;
int brace_ignore = 1;
token->else_nesting_level = 0;
token_found = TRUE;
variable_seen = FALSE;
while (brace_ignore &&
token_found) {
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
if (token_found &&
!token->else_nesting_level) {
switch (token->cur_token[0]) {
case '(':
/* increment brace_ignore and continue */
brace_ignore++;
break;
case ')':
/* just decrement brace_ignore if it is positive. If
* brace ignore is not positive at this point then we
* certainly have a syntax error. Ignore this fact if
* so. */
if (brace_ignore) {
brace_ignore--;
}
break;
case '[':
/* move to end of array bounds */
variable_seen = TRUE;
CToLevelZero(token, token_buffer, outfile,
infname, flags);
break;
default:
CTokenSwap(token);
break;
}
}
}
return variable_seen;
}
/*----------------------------------------------------------------------------
*
* COutputCommaDelimitedToken() will output a token and then parse the
* statement until ';' or ',' is reached. The token is output if the passed
* token type is requested from the command line.
*
---------------------------------------------------------------------------*/
void COutputCommaDelimitedToken(Token * token, Buffer * token_buffer,
SymbolType token_type, FILE * outfile,
char *infname, Flags * flags)
{
char open_brace[] = "{[("; /* open brace set */
BOOLEAN punctuator_found;
/* output the token */
COutputToken(token, token_buffer, token_type,
outfile, infname, flags);
/* go to the next list punctuator (',' or ';') */
punctuator_found = TRUE;
while (token->cur_token[0] != ',' &&
token->cur_token[0] != ';' &&
punctuator_found) {
if (strchr(open_brace, token->cur_token[0])) {
CToLevelZero(token, token_buffer, outfile, infname, flags);
}
punctuator_found = CToPunctuator(token, token_buffer, outfile,
infname, flags);
}
}
/*----------------------------------------------------------------------------
*
* CParseCommaDelimitedList() will parse a token list seperated by commas
* until a ';' is found. The tokens are output if the passed type is
* requested from the command line.
*
---------------------------------------------------------------------------*/
void CParseCommaDelimitedList(Token * token, Buffer * token_buffer,
SymbolType token_type, FILE * outfile,
char *infname, Flags * flags)
{
char open_brace[] = "{[("; /* open brace set */
BOOLEAN punctuator_found;
/* parse through the list */
punctuator_found = TRUE;
while (token->cur_token[0] != ';' &&
punctuator_found) {
punctuator_found = CToPunctuator(token, token_buffer, outfile,
infname, flags);
if (punctuator_found) {
switch (token->cur_token[0]) {
case '(':
/* this is an embedded variable declaration, either a
* complex variable pointer or function pointer, fall
* through after picking out the internal token */
CParseParens(token, token_buffer, outfile,
infname, flags);
case '[':
case ',':
case ';':
case '=':
/* this is one of the proper ending tokens for this type
* of declaration list, so output it and parse to the
* next correct punctuator */
COutputToken(token, token_buffer, token_type,
outfile, infname, flags);
while (token->cur_token[0] != ',' &&
token->cur_token[0] != ';' &&
punctuator_found) {
if (strchr(open_brace, token->cur_token[0])) {
CToLevelZero(token, token_buffer, outfile,
infname, flags);
}
punctuator_found = CToPunctuator(token, token_buffer,
outfile, infname,
flags);
}
break;
default:
break;
}
}
}
}
/*----------------------------------------------------------------------------
*
* CParseFunctionOrGlobalVariable() will parse a function, prototype or
* global variable syntax.
*
---------------------------------------------------------------------------*/
void CParseFunctionOrGlobalVariable(Token * token, Buffer * token_buffer,
FILE * outfile, char *infname,
Flags * flags)
{
char buf[MAX_TOKEN_LENGTH]; /* the first token buffer */
long int charloc; /* the char location of sbuf1 */
long int tokenline; /* the line number of sbuf1 */
long int lineoffset; /* the line offset of sbuf1 */
BOOLEAN token_found;
BOOLEAN punctuator_found;
BOOLEAN last_token_known;
BOOLEAN variable_seen;
/* init */
buf[0] = '\0';
charloc = 0;
tokenline = 1;
lineoffset = 0;
/* save the previous token */
last_token_known = CIsDeclarationToken(token->prev_token);
if (!last_token_known) {
/* If this is not a known token then it may be a function name. Save
* it then look further at the syntax. This also may be a symbol
* previously defined via a typedef which alters the syntax of C/C++ */
strcpy(buf, token->prev_token);
charloc = *(token->prev_char_location);
tokenline = *(token->prev_token_line);
lineoffset = *(token->prev_line_offset);
}
/* This is a function or prototype or global variable go to brace_ignore
* level zero again. */
variable_seen = CParseParens(token, token_buffer, outfile,
infname, flags);
/* Check to see if this is a function, prototype, or global variable. If
* the token is a ';' and last_token_known is false then we assume a
* function. Strange variable declarations may fool this, but not
* likely. If the character is a '(' then it is certainly a function or
* prototype unless variable_seen is TRUE, then it is a variable. If the
* character is a '[', ',' then it is certainly a variable declaration.
* If the character is a ';' and last_token_known is true then it is a
* variable declaration. If the token is anything else then it is a
* function. */
token_found = CNextToken(token, token_buffer, outfile, infname, flags);
if (token_found) {
switch (token->cur_token[0]) {
case ';':
/* determine if a prototype or a variable declaration. if the
* last_token_known is true then it is a global variable. If
* the token was a symbol defined by a typedef then this
* distinction is incorrect since typedef actually alters
* syntax. This is correct for the large majority of cases
* since most do not enclose simple variable declarations in
* parens. */
if (last_token_known) {
/* this is a global variable */
COutputToken(token, token_buffer, GlobalVariable,
outfile, infname, flags);
}
else {
/* this is a prototype, copy saved token back to
* prev_token, output and continue */
strcpy(token->prev_token, buf);
*(token->prev_char_location) = charloc;
*(token->prev_token_line) = tokenline;
*(token->prev_line_offset) = lineoffset;
COutputToken(token, token_buffer, ProtoType,
outfile, infname, flags);
}
break;
case '(':
if (variable_seen) {
/* this is a variable declaration */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
}
else {
/* move to level zero again */
CToLevelZero(token, token_buffer, outfile,
infname, flags);
/* obtain the next token */
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
if (token_found) {
/* check if prototype, function or function pointer
* variable declaration */
switch (token->cur_token[0]) {
case '=':
/* this is a function pointer variable
* declaration */
COutputCommaDelimitedToken(token,
token_buffer,
GlobalVariable,
outfile,
infname, flags);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
outfile,
infname, flags);
break;
case ';':
/* this is a prototype, output it */
COutputToken(token, token_buffer, ProtoType,
outfile, infname, flags);
break;
default:
/* this is a function */
COutputToken(token, token_buffer, Function,
outfile, infname, flags);
/* parse through function */
punctuator_found = TRUE;
while (token->cur_token[0] != '{' &&
punctuator_found) {
punctuator_found =
CToPunctuator(token, token_buffer,
outfile, infname,
flags);
}
if (punctuator_found) {
CToLevelZero(token, token_buffer, outfile,
infname, flags);
}
break;
}
}
}
break;
case '[':
case '=':
case ',':
/* global variables */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
break;
default:
/* this is a function, copy saved token back to prev_token,
* output and continue */
strcpy(token->prev_token, buf);
*(token->prev_char_location) = charloc;
*(token->prev_token_line) = tokenline;
*(token->prev_line_offset) = lineoffset;
COutputToken(token, token_buffer, Function,
outfile, infname, flags);
/* parse through function */
punctuator_found = TRUE;
while (token->cur_token[0] != '{' &&
punctuator_found) {
punctuator_found =
CToPunctuator(token, token_buffer, outfile,
infname, flags);
}
if (punctuator_found) {
CToLevelZero(token, token_buffer, outfile,
infname, flags);
}
break;
}
}
}
/*----------------------------------------------------------------------------
*
* CParseNOP() will parse an as of yet unrecognized statement. If I run into
* a punctuator at this time then I have found either a structure declaration
* (C++ 2.0), or a global variable declaration. If the punctuator is '[',
* ',', '=', or ';' then it is a global variable declaration. If the
* punctuator is a '{' then we have a structure declaration at this time we
* should not run into any closing punctuators or syntax is in a bad way
*
---------------------------------------------------------------------------*/
void CParseNOP(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
switch (token->cur_token[0]) {
case ';':
case '=':
case ',':
case '[':
/* global variables are here */
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
token->extern_active = FALSE;
token->CPP_extern_active = FALSE;
token->static_active = FALSE;
break;
case '{':
/* validate we are not in a C++ extern for C statements */
if (!token->CPP_extern_active) {
/* this is a structure (C++ syntax) */
/* output it */
COutputToken(token, token_buffer, Structure,
outfile, infname, flags);
/* move through declaration */
CToLevelZero(token, token_buffer, outfile, infname, flags);
/* get the next token */
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
/* if a token is available then output the list */
if (token_found) {
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable, outfile,
infname, flags);
}
token->extern_active = FALSE;
token->static_active = FALSE;
}
break;
case '(':
CParseFunctionOrGlobalVariable(token, token_buffer, outfile,
infname, flags);
token->extern_active = FALSE;
token->CPP_extern_active = FALSE;
token->static_active = FALSE;
break;
case '"':
if (!strcmp("\"C\"",token->cur_token))
token->CPP_extern_active = TRUE;
break;
default:
/* true NOP */
break;
}
}
/*----------------------------------------------------------------------------
*
* CParseEnumerationConstants() will parse constants within an enumeration
* declaration
*
---------------------------------------------------------------------------*/
void CParseEnumerationConstants(Token *token, Buffer *token_buffer,
FILE *outfile, char *infname,
Flags *flags)
{
BOOLEAN punctuator_found;
char open_brace[] = "({[";
/* obtain the enumeration constants */
punctuator_found = TRUE;
while (token->cur_token[0] != '}' &&
punctuator_found) {
punctuator_found = CToPunctuator(token, token_buffer, outfile,
infname, flags);
if (punctuator_found) {
switch (token->cur_token[0]) {
case ',':
case '=':
/* this is one of the proper ending tokens for this type
* of declaration list, so output it and parse to the
* next correct punctuator */
COutputToken(token, token_buffer, EnumerationConstant,
outfile, infname, flags);
while (token->cur_token[0] != ',' &&
token->cur_token[0] != '}' &&
punctuator_found) {
if (strchr(open_brace, token->cur_token[0])) {
CToLevelZero(token, token_buffer, outfile,
infname, flags);
}
punctuator_found = CToPunctuator(token, token_buffer,
outfile, infname,
flags);
}
break;
default:
break;
}
}
}
}
/*----------------------------------------------------------------------------
*
* CParseDeclarationStatement() will parse struct, enum and union
* declarations. take the token just before the first punctuator, run
* through the top level braces and parse for variables if the first
* punctuator is a ';' then this is a global variable declaration, if the
* first token[0] is a '{' then this is a global variable declaration.
*
---------------------------------------------------------------------------*/
void CParseDeclarationStatement(Token * token, Buffer * token_buffer,
SymbolType type, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
BOOLEAN punctuator_found;
BOOLEAN primary_parse;
punctuator_found = CToPunctuator(token, token_buffer, outfile,
infname, flags);
if (punctuator_found) {
/* init */
primary_parse = TRUE;
/* switch on current token */
switch (token->cur_token[0]) {
/* this is truly an object declaration */
case '{':
/* output only if this is not a variable declaration */
if (token->token_count != 1) {
/* output it */
COutputToken(token, token_buffer, type,
outfile, infname, flags);
}
/* check if enumeration */
if (type == Enumeration) {
CParseEnumerationConstants(token, token_buffer,
outfile, infname, flags);
}
else {
/* move through declaration and fall through */
CToLevelZero(token, token_buffer, outfile, infname, flags);
}
/* get the next token, if one not available then break out of
* case */
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
if (!token_found)
break;
/* fall through to take care of variable declarations after
* setting pre-parse flag */
primary_parse = FALSE;
case ';':
case '=':
case ',':
case '[':
/* if this is the first seen then output it */
if (primary_parse) {
COutputCommaDelimitedToken(token, token_buffer,
GlobalVariable,
outfile, infname,
flags);
}
CParseCommaDelimitedList(token, token_buffer,
GlobalVariable,
outfile, infname,
flags);
break;
case '(':
CParseFunctionOrGlobalVariable(token, token_buffer,
outfile, infname, flags);
break;
default:
/* not reached */
break;
}
}
}
/*----------------------------------------------------------------------------
*
* CParseTypeDefinition() parses the typedef statement. take the token just
* before the first *correct* punctuator, the ';', ',' or the '['. Tag any
* declarations being done here, get the next token
*
---------------------------------------------------------------------------*/
void CParseTypeDefinition(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
BOOLEAN parens_found;
BOOLEAN special_found;
int token_count;
SymbolType tmptype;
token_found = CNextToken(token, token_buffer, outfile, infname, flags);
if (token_found) {
/* check the type of the token for future use */
tmptype = CTokenType(token->cur_token);
/* parse the typedef */
parens_found = FALSE;
special_found = FALSE;
token_count = 0;
while (token->cur_token[0] != ';' &&
token->cur_token[0] != ',' &&
token->cur_token[0] != '[' &&
token_found &&
!special_found) {
/* parse for defines */
if (token_found) {
/* handle the punctuators */
switch (token->cur_token[0]) {
case '{':
/* pass through any defines going on here */
if (token->cur_token[0] == '{') {
/* if the token count is > 1 here then we have a
* named declaration and need to output the
* token, output only if the token type is enum,
* struct, or union */
if (token_count > 1 &&
(tmptype == Structure ||
tmptype == Enumeration ||
tmptype == Union)) {
COutputToken(token, token_buffer,
tmptype, outfile,
infname, flags);
}
if (tmptype == Enumeration) {
CParseEnumerationConstants(token,
token_buffer,
outfile, infname,
flags);
}
else {
/* go back to level 0 */
CToLevelZero(token, token_buffer, outfile,
infname, flags);
}
}
break;
case '(':
/* if this is the top level and we have already been
* through a set of parens then we know this to be a
* function typedef so we output the previous token,
* otherwise check the previous token and if it is a
* known keyword then just eat the token and continue
* on our way */
if (parens_found) {
COutputToken(token, token_buffer,
TypeDefinition, outfile,
infname, flags);
CToLevelZero(token, token_buffer, outfile,
infname, flags);
special_found = TRUE;
}
else {
/* Move back to the top level */
CParseParens(token, token_buffer, outfile,
infname, flags);
/* next paren we find we know we have a token */
parens_found = TRUE;
/* swap to prevent loss of token */
CTokenSwap(token);
}
break;
default:
/* if we have another token after a paren parse then
* we know the token in the parens was nothing
* special */
parens_found = FALSE;
break;
}
}
/* get another token */
CTokenSwap(token);
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
token_count++;
}
/* output the typedef names if appropriate */
if (token->prev_token[0] != '}' &&
token_found) {
/* don't output the first token if already done */
if (!special_found) {
COutputCommaDelimitedToken(token, token_buffer,
TypeDefinition, outfile,
infname, flags);
}
/* parse through the rest of the typedef names */
CParseCommaDelimitedList(token, token_buffer,
TypeDefinition, outfile,
infname, flags);
}
}
}
/*----------------------------------------------------------------------------
*
* CParseClass() will parse the C++ class syntax. take the token just before
* the first '{', ',' or ':' and run through the top level braces if there
*
---------------------------------------------------------------------------*/
void CParseClass(Token * token, Buffer * token_buffer, FILE * outfile,
char *infname, Flags * flags)
{
BOOLEAN token_found;
token_found = TRUE;
while (token->cur_token[0] != '{' &&
token->cur_token[0] != ':' &&
token->cur_token[0] != ';' &&
token_found) {
/* save the current token */
CTokenSwap(token);
/* get the next token */
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
}
/* output the class name */
if (token_found) {
COutputToken(token, token_buffer, Class,
outfile, infname, flags);
/* parse through the remainder of the statement */
while (token->cur_token[0] != ';' &&
token_found) {
if (token->cur_token[0] == '{') {
/* move back to the zero level */
CToLevelZero(token, token_buffer, outfile, infname, flags);
}
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
}
}
}
/*----------------------------------------------------------------------------
*
* CTags() tags an input stream assuming standard ANSI 2.0 C/C++ syntax.
* Long tokens are allowed, ANSI requires only 31 significant.
*
---------------------------------------------------------------------------*/
void CTags(FILE * infile, char *infname, FILE * outfile, Flags * flags)
{
SymbolType type; /* the type of the current token */
Buffer input_buffer; /* the file buffer and state, stack alloc */
Buffer *token_buffer = &input_buffer; /* a convenient pointer */
Token token_state; /* the token state, stack alloc */
Token *token = &token_state;/* a convenient pointer to token state */
BOOLEAN token_found; /* set by CNextToken() */
/* init the parser engine */
CParserInit();
token->token_count = 0;
/* init the current token buffers */
token->cur_token = token->sbuf1;
token->cur_char_location = &(token->charloc1);
token->cur_token_line = &(token->tokenline1);
token->cur_token[0] = '\0';
token->cur_line_offset = &(token->lineoffset1);
*(token->cur_char_location) = 0;
*(token->cur_token_line) = 1;
*(token->cur_line_offset) = 0;
/* init the previous token buffers */
token->prev_token = token->sbuf2;
token->prev_char_location = &(token->charloc2);
token->prev_token_line = &(token->tokenline2);
token->prev_token[0] = '\0';
token->prev_line_offset = &(token->lineoffset2);
*(token->prev_char_location) = 0;
*(token->prev_token_line) = 1;
*(token->prev_line_offset) = 0;
/* init the input buffers */
token_buffer->token_char_location = 0;
token_buffer->token_line_location = 1;
token_buffer->token_line_offset = 0;
token_buffer->Cbuf[0] = '\0';
token_buffer->buffer = token_buffer->Cbuf;
token_buffer->infile = infile;
/* init Extern and Static states */
token->extern_active = FALSE;
token->CPP_extern_active = FALSE;
token->static_active = FALSE;
/* get the first token */
token_found = CNextToken(token, token_buffer, outfile, infname, flags);
/* loop through the file */
while (token_found) {
/* obtain the token type */
type = CTokenType(token->cur_token);
/* react on the token type */
switch (type) {
case NOP:
CParseNOP(token, token_buffer, outfile, infname, flags);
break;
case Structure:
case Enumeration:
case Union:
CParseDeclarationStatement(token, token_buffer, type,
outfile, infname, flags);
break;
case TypeDefinition:
CParseTypeDefinition(token, token_buffer, outfile,
infname, flags);
break;
case Class:
CParseClass(token, token_buffer, outfile, infname, flags);
break;
case Extern:
token->extern_active = TRUE;
break;
case Static:
token->static_active = TRUE;
break;
default:
/* not reached */
break;
}
if (type != Extern &&
type != Static &&
type != NOP) {
/* turn off the extern and static flag */
token->extern_active = FALSE;
token->CPP_extern_active = FALSE;
token->static_active = FALSE;
}
/* swap state variables and get the next token */
CTokenSwap(token);
token_found = CNextToken(token, token_buffer, outfile,
infname, flags);
}
}